/************************************************************************** * * * Copyright (C) 1993, Silicon Graphics, Inc. * * * * These coded instructions, statements, and computer programs contain * * unpublished proprietary information of Silicon Graphics, Inc., and * * are protected by Federal copyright law. They may not be disclosed * * to third parties or copied or duplicated in any form, in whole or * * in part, without the prior written consent of Silicon Graphics, Inc. * * * **************************************************************************/ /************************************************************************** | This sample IRIX device driver implements a "ram disk" -- a block of | kernel memory accessed as if it were a disk. The driver supports both | block and character interfaces and is loadable and unloadable. | | N N OO TTTTT EEEEE It does not make sense to use a ram disk | NN N O O T E in a system like IRIX that implements | N N N O O T EEE :: effective virtual memory. This device | N NN O O T E driver is useful as an example because | N N OO T EEEEE :: it has no hardware dependencies, and so | can be tried out in any IRIX system. | However, this driver SHOULD NOT be employed in a production system! | It WILL NOT give better performance. It WILL consume kernel memory | that would be better used for buffers. |**************************************************************************/ #include <sys/ddi.h> /* gets also sys/types.h and sys/buf.h */ #include <sys/conf.h> /* for driver flags D_MP etc */ #include <sys/kmem.h> /* kmem_alloc and friends */ #include <sys/sema.h> /* the rd_info_t contains a semaphore */ #include <sys/dvh.h> /* the rd_info_t contains struct volume_header */ #include "ramdrive.h" /* declare rd_info_t, etc. */ #include <sys/edt.h> /* declare edt_t for edtinit() */ #include <sys/errno.h> /* error codes to return */ #include <sys/cmn_err.h> /* cmn_err() and related constants */ #include <sys/cred.h> /* cred_t for prototypes */ #include <sys/dkio.h> /* DKIOC* constants for ioctl */ #include <sys/param.h> /* NBPSC bytes per sector */ #include <sys/immu.h> /* IONBPP bytes per I/O page, btod() */ #include <sys/file.h> /* FEXCL and other open flags */ #include <sys/open.h> /* OTYP_CHR, OTYP_BLK */ #include <sys/region.h> /* for vhandl_t */ #include <sys/mload.h> /* only for M_VERSION */ /************************************************************************** | Debug display macros: one each for cmn_err calls with 0, 1, 2, or 3 | variable arguments. |**************************************************************************/ #ifdef DEBUG #define DBGMSG0(s) cmn_err(CE_DEBUG,s) #define DBGMSG1(s,x) cmn_err(CE_DEBUG,s,x) #define DBGMSG2(s,x,y) cmn_err(CE_DEBUG,s,x,y) #define DBGMSG3(s,x,y,z) cmn_err(CE_DEBUG,s,x,y,z) #define DBGMSG4(s,x,y,z,w) cmn_err(CE_DEBUG,s,x,y,z,w) #else #define DBGMSG0(s) #define DBGMSG1(s,x) #define DBGMSG2(s,x,y) #define DBGMSG3(s,x,y,z) #define DBGMSG4(s,x,y,z,w) #endif /************************************************************************** | Driver flag: this driver is MP-safe. Also version flag for mload. |**************************************************************************/ unsigned rd_devflag = D_MP; char *rd_mversion = M_VERSION; /************************************************************************** | Array of rd_info_t objects, one per allowed minor device. We rely on | the loader to ensure these static globals are zero until initialized! | Also defined: two convenience macros for frequent expressions. |**************************************************************************/ static rd_info_t *rd_array; #define INFOPTR(dev) &rd_array[geteminor(dev)] #define VALIDIO(prd,off,len) (((off_t)(off) + (off_t)(len)) <= prd->size) /************************************************************************** | rd_basic() is called from rd_edtinit() to allocate the rd_array based | on the global rd_numdevs, an integer set to ##D in the configuration | file /var/sysgen/master.d/ramdrive. Also display the other available | globals for debugging purposes. |**************************************************************************/ extern int rd_e_major, rd_numdevs, rd_ctrlrs; int rd_basic(void) { if (!rd_array) { register int size; DBGMSG3("ramdrive basic: ##E=%d, ##D=%d, ##C=%d\n", rd_e_major, rd_numdevs, rd_ctrlrs); if (size = rd_numdevs*sizeof(rd_info_t)) rd_array = (rd_info_t *)kmem_zalloc(size,KM_SLEEP); else cmn_err(CE_ALERT,"ramdrive: confused"); } return (0 != rd_array); } /************************************************************************** | rd_init() is included solely to demonstrate that this entry point | can be called in addition to rd_edtinit() and rd_start(). |**************************************************************************/ int rd_init(void) { DBGMSG0("rd_init entry point called\n"); return 0; } /************************************************************************** | rd_start() is included solely to demonstrate that it, too can be called | in addition to rd_edtinit() and rd_init(). |**************************************************************************/ int rd_start(void) { DBGMSG0("rd_start entry point called\n"); return 0; } /************************************************************************** | rd_format() is a subroutine of both rd_edtinit() and rd_ioctl() which | "formats" the ramdrive to zeros with a reasonable volume header. | The volume header (set in both the info struct and "sector 0") | describes standard SGI partitions: | 10 == the whole "drive" | 8 == the volume header, only one sector in this case | 7 == all sectors except the volume header | 0 == data ("root") same as 7 | 1 == swap contains 0 sectors | For versimilitude we arbitrarily say we have 1 track/cylinder | and 8 sectors/track. This assumes that nsectors is a multiple of 8, | which is a good bet when the allocated size is a multiple of IO pages | and sectors are 512 bytes. |**************************************************************************/ void rd_format(register rd_info_t *prd) { register struct volume_header *pvh = &prd->vh; register int nsectors = btod(prd->size);/* immu.h */ bzero((void *)pvh,sizeof(struct volume_header)); pvh->vh_magic = VHMAGIC; /* in sys/dvh.h */ pvh->vh_rootpt = 0; pvh->vh_swappt = 1; pvh->vh_dp.dp_cyls = nsectors/8; /* number of cylinders */ pvh->vh_dp.dp_trks0 = 1; /* tracks/cyl */ pvh->vh_dp.dp_secs = 8; /* sectors/track */ pvh->vh_dp.dp_secbytes = NBPSCTR; /* param.h */ pvh->vh_dp.dp_interleave = 1; pvh->vh_pt[10].pt_firstlbn = 0; pvh->vh_pt[10].pt_nblks = nsectors; pvh->vh_pt[10].pt_type = PTYPE_VOLUME; pvh->vh_pt[ 8].pt_firstlbn = 0; pvh->vh_pt[ 8].pt_nblks = 1; pvh->vh_pt[ 8].pt_type = PTYPE_VOLHDR; pvh->vh_pt[ 8].pt_firstlbn = 0; pvh->vh_pt[ 7].pt_firstlbn = 1; pvh->vh_pt[ 7].pt_nblks = nsectors-1; pvh->vh_pt[ 7].pt_type = PTYPE_RAW; pvh->vh_pt[ 0] = pvh->vh_pt[ 7]; pvh->vh_pt[ 1].pt_firstlbn = nsectors; pvh->vh_pt[ 1].pt_nblks = 0; pvh->vh_pt[ 1].pt_type = PTYPE_RAW; pvh->vh_csum = -vh_checksum(pvh); bzero(prd->base,prd->size); /* clear all sectors */ bcopy(pvh,prd->base,sizeof(prd->vh)); /* vh in sec 0 */ } /************************************************************************** | rd_edtinit() is called whenever the driver is loaded, once for each | VECTOR that names this driver. A typical VECTOR line would be: | VECTOR module=ramdrive ctrl=2 base=0x00040000 | which says, initialize minor number 2 for a size of 256K. |**************************************************************************/ int rd_edtinit(register edt_t *pedt) { register rd_info_t *prd; register __psint_t size; register int nsectors; register int ctlr = pedt->e_ctlr; /* || If this is the first time, allocate the rd_array of info structures. || Exit immediately if that fails. */ if (!rd_basic()) { return ENODEV; } DBGMSG3("ramdrive edtinit bustype %d adap %d ctlr %d\n", pedt->e_bus_type, pedt->e_adap, pedt->e_ctlr); DBGMSG3(" e_space[0] iopaddr %x size %x vaddr %x\n", pedt->e_space[0].ios_iopaddr,pedt->e_space[0].ios_size, pedt->e_space[0].ios_vaddr); /* || Diagnose and reject an invalid minor dev# from VECTOR ctlr= */ if (ctlr > rd_numdevs) { cmn_err(CE_ALERT,"ramdrive: ctlr=%d invalid minor dev#",ctlr); return ENODEV; } /* || Address the info structure and diagnose multiple initialization */ prd = INFOPTR(ctlr); if (prd->base) { cmn_err(CE_ALERT,"ramdrive: duplicate VECTOR for ctlr=%d",ctlr); return EBUSY; } /* || The desired size of the ramdrive is encoded as the base=# value, || which is passed as the ios_vaddr value in the edt_t. || Diagnose 0 size (omitted base=). Round the size to a || multiple of *memory* (not necessarily I/O) pages. */ size = (__psint_t) pedt->e_space[0].ios_vaddr; if ((0 == size)||(-1 == size)) { cmn_err(CE_ALERT, "ramdrive: no size (base=) specified for ctlr=%d",ctlr); return EINVAL; } size = (size + (NBPP-1)) & (-NBPP); /* in sys/immu.h */ /* || Allocate the kernel memory. Report an error if not possible. */ prd->size = size; prd->base = kmem_alloc(size,KM_SLEEP); if (!prd->base) { cmn_err(CE_ALERT, "ramdrive: unable to allocate %x bytes for dev %d",size,ctlr); return ENOMEM; } nsectors = btod(size); /* immu.h bytes to disk sectors */ DBGMSG3("ramdrive: dev# %d allocated %x = %x sectors\n", ctlr,size,nsectors); /* || Initialize the semaphore. */ initnsema(&prd->queue,1,"ramdrive"); /* || Initialize the "volume." */ rd_format(prd); DBGMSG2(" info at 0x%x vh at 0x%x \n", prd, (__psint_t)(&prd->vh) ); return 0; } /************************************************************************** | rd_open() is called for each open() of a character device /dev/ramchr<n>, | and during a mount of a block device /dev/ramblk<n>. We can distinguish | between types of open from the otyp. |**************************************************************************/ int rd_open(dev_t *pdev, int oflag, int otyp, cred_t *pcred) { register rd_info_t *prd = INFOPTR(*pdev); register int error = 0; /* || Make sure the device being opened was initialized by a VECTOR. */ if (!prd->base) { cmn_err(CE_NOTE,"ramdrive: open of uninitialized dev %d",*pdev); return ENODEV; } /* || Seize the device semaphore so that prd->rd_info can be updated || without error on a multiprocessor. */ psema(&prd->queue,PZERO+1 | PCATCH); /* || Implement FEXCL (exclusive) open for a privileged process only. || Exclusivity applies to the entire minor device, under both its || block and character special devices. */ if (oflag & FEXCL) { if (drv_priv(pcred)) /* not privileged */ { DBGMSG0("ramdrive: reject FEXCL with EPERM\n"); error = EPERM; } else if (prd->copen+prd->bopen+prd->nmmap) /* current use? */ { DBGMSG0("ramdrive: reject FEXCL with EBUSY\n"); error = EBUSY; } else { prd->xopen = oflag; /* note device open exclusively */ } } else /* nonexclusive request can be blocked by exclusive open */ { if (prd->xopen) { DBGMSG0("ramdrive: reject normal open for exclusivity\n"); error = EBUSY; } } if (!error) { /* || Count the open so we don't unload with open devices. */ if (otyp & OTYP_CHR) ++prd->copen; else ++prd->bopen; DBGMSG4("ramdrive open: flag %x copen %d bopen %d xopen %d\n", oflag, prd->copen, prd->bopen, prd->xopen); } vsema(&prd->queue); return error; } /************************************************************************** | rd_close() is not called for each close() but for the final close of a | given device (character or block). Clear the respective count of opens | and note whether exclusivity is being given up. Since a close() in | one CPU could happen concurrently with an open() in another CPU, we | need to grab the semaphore before updating the rd_info. | NOTE: the flag passed to close does not contain FEXCL even if it was | given in the flag passed to open. |**************************************************************************/ int rd_close(dev_t dev, int flag, int otyp, cred_t *pcred) { register rd_info_t *prd = INFOPTR(dev); psema(&prd->queue,PZERO+1 | PCATCH); if (flag & FEXCL) { /* this is never entered */ } if (otyp & OTYP_CHR) { prd->copen = 0; } else { prd->bopen = 0; } /* if all opens are closed, an exclusive one is closed */ prd->xopen = 0; vsema(&prd->queue); DBGMSG4("ramdrive close: flag %x copen %d bopen %d xopen %d\n", flag, prd->copen, prd->bopen, prd->xopen); return 0; } /************************************************************************** | rd_ioctl() is called for ioctl(2), which can only be used on a character | device. Disk ioctl command numbers for are in sys/dkio.h. | DIOCREADCAPACITY: supported just for fun. | DIOCGETVH: supported because /etc/mkfs and other tools use it (which | explains why you apply mkfs to the character, not the block, device). | DIOCSETVH: allows a program to change the "volume header" info. | DIOCFORMAT: clears the device contents to 0, rewrites the vol header. | | The DIOC(S|G)ETVH calls use only the info in the per-device structure | in memory. We make no attempt to keep that info in step with the | contents of sector 0 of the simulated media. This is consistent with | other current IRIX disk drivers. This has the implications that: | - you can change the driver's idea of the disk geometry on the fly, | without actually formatting the disk, this is useful for scsi. | - if you want to make a permanent change in the volume header, | -- one, that's a bad idea, use dvhtool(1) instead, but | -- two, if you insist, you need both a write to sector 0 and | a call to ioctl(,DIOCSETVH) to keep the driver up to date. | | Neither DIOCSETVH nor DIOCFORMAT hold the semaphore. You are strongly | advised to do an exclusive open before calling them (but mkfp doesn't). |**************************************************************************/ int rd_ioctl(dev_t dev, int cmd, caddr_t arg, int mode, cred_t *pcred, int *rval) { register rd_info_t *prd = INFOPTR(dev); register int error = 0; register caddr_t kmemadr; register int len = 0; register int dir = 0; /* copyout */ int capacity; switch(cmd) { case DIOCGETVH: { kmemadr = (caddr_t)(&prd->vh); len = sizeof(prd->vh); DBGMSG1("DIOCGETVH on %d\n",dev); break; } case DIOCREADCAPACITY: { capacity = prd->size/NBPSCTR; kmemadr = (caddr_t)(&capacity); len = sizeof(capacity); DBGMSG2("DIOCREADCAPACITY on %d = %d\n", dev,capacity); break; } case DIOCSETVH: { kmemadr = (caddr_t)(&prd->vh); len = sizeof(prd->vh); dir = 1; /* copyin */ DBGMSG1("DIOCSETVH on %d done\n",dev); break; } case DIOCFORMAT: { rd_format(prd); DBGMSG1("DIOCFORMAT done on %d!\n",dev); break; } default: { DBGMSG2("ramdrive invalid ioctl %x on %d\n",cmd,dev); error = EINVAL; } } /* switch(cmd) */ /* || Perform the copy to or from user space if needed. */ if ((!error) && (len)) { if (!dir) { DBGMSG3("ioctl copy kmem %x -> usr %x for %d\n", kmemadr, arg, len); error = copyout(kmemadr,arg,len); } else { DBGMSG3("ioctl copy usr %x -> kmem %x for %d\n", arg, kmemadr, len); error = copyin(arg,kmemadr,len); } #ifdef DEBUG if (error) DBGMSG1("error %d on ioctl copy\n",error); #endif } *rval = error; /* ensure user gets correct code */ return error; } /************************************************************************** | I/O Operations: | | rd_strategy() performs all actual I/O. Called directly by file systems | to read and write full I/O page units aligned on I/O page boundaries. | Called indirectly to implement character I/O in any length and alignment. | | rd_read() and rd_write are called by read()/write() to a character | device. They defer to rd_strategy via uiophysio(). This is consistent | with the operation of other IRIX disk drivers. | | The strategy code simply does a bcopy. This is highly unrealistic. | A real device driver would have to deal with efficient sequencing of | track numbers and with asynchronous interrupts. |**************************************************************************/ int rd_strategy(register struct buf *pbuf) { register rd_info_t *prd = INFOPTR(pbuf->b_edev); register __psint_t offset = pbuf->b_blkno * NBPSCTR; register __psint_t count = pbuf->b_bcount; register caddr_t target = (caddr_t)((__psint_t)prd->base)+offset; DBGMSG3("rd_strategy: edev %d, flags %x, blkno %x\n", pbuf->b_edev,pbuf->b_flags,pbuf->b_blkno); DBGMSG3(" : offset %x, count %x, dmaadr %x\n", offset,count,(caddr_t)pbuf->b_dmaaddr); if (!VALIDIO(prd,offset,count)) { DBGMSG0("rejecting strategy with ENOSPC\n"); pbuf->b_error = ENOSPC; iodone(pbuf); return 0; } /* || Ensure that pbuf->b_dmaaddr is a valid kernel address. || This is never needed when called via uiophysio, only when || called from the file system or paging subsystem. (Goodness! || wouldn't it be fun to use a ramdrive for swapping?) || NOTE: while a simple bp_mapin() call works, this approach || would impose unnecessary overhead in a real driver when || the device does not support scatter/gather. */ if (!BP_ISMAPPED(pbuf)) { bp_mapin(pbuf); DBGMSG1(" : after bp_mapin dmaadr %x\n", pbuf->b_dmaaddr); } /* || Grab the device semaphore. Note: this ensures consistency || between reads and writes, but does not control modifications || made through memory-mapped access. */ psema(&prd->queue,PZERO+1 | PCATCH); /* || Perform the "read" or "write." */ if (pbuf->b_flags & B_READ) { DBGMSG3(" : read %x to %x for %x\n", target,pbuf->b_dmaaddr,pbuf->b_bcount); bcopy(target,pbuf->b_dmaaddr,pbuf->b_bcount); } else { DBGMSG3(" : write %x to %x for %x\n", pbuf->b_dmaaddr,target,pbuf->b_bcount); bcopy(pbuf->b_dmaaddr,target,pbuf->b_bcount); } vsema(&prd->queue); iodone(pbuf); return 0; } int rd_read(dev_t dev, uio_t *puio, cred_t *pcred) { DBGMSG1("rd_read entered for dev %d\n",dev); return uiophysio(rd_strategy,0,dev,B_READ,puio); } int rd_write(dev_t dev, uio_t *puio, cred_t *pcred) { DBGMSG1("rd_write entered for dev %d\n",dev); return uiophysio(rd_strategy,0,dev,B_WRITE,puio); } int rd_size(dev_t dev) { DBGMSG1("rd_size entered for dev %d\n",dev); return rd_array[geteminor(dev)].size/NBPSCTR; } /************************************************************************** | Memory mapping: rd_map() (one "m") is called to implement an mmap() | request on a character device. We permit read and write mappings, which | means that in a multiprocessor, one CPU could be updating the kernel | memory that represents the medium while another CPU executes a read() | on the same memory. | | Since a map can persist after the corresponding FD is closed, we | keep track of mappings separately from opens. ***************************************************************************/ int rd_map(dev_t dev, vhandl_t *pvh, off_t off, int len, int prot) { register rd_info_t *prd = INFOPTR(dev); int error; DBGMSG3("map request on %d at %x for %x\n",dev,off,len); if (VALIDIO(prd,off,len)) { error = v_mapphys(pvh,prd->base+off,len); #ifdef DEBUG if (error) DBGMSG1("v_mapphys returns %d\n",error); #endif } else { DBGMSG0("rejecting map with ENOSPC\n"); error = ENOSPC; } if (!error) ++prd->nmmap; return error; } rd_unmap(dev_t dev, vhandl_t *pvh) { register rd_info_t *prd = INFOPTR(dev); if (prd->nmmap) { --prd->nmmap; DBGMSG2("unmap on %d, map count now %d\n",dev,prd->nmmap); } else { DBGMSG1("unmap on %d when map count 0 ?!?!?!?\n",dev); } return 0; } /************************************************************************** | Unload support: rd_unload() is called when ml(1) is asked to unload | this driver. We test to make sure that none of our devices that have | been initialized, are in use. When any are in use, we return EBUSY | and so will not be unloaded. ***************************************************************************/ int rd_unload(void) { int j; for (j = 0; j<rd_numdevs; ++j) { if (( rd_array[j].base ) && ( rd_array[j].copen ||rd_array[j].bopen ||rd_array[j].nmmap) ) { DBGMSG1("rejecting unload because dev %d busy\n",j); return EBUSY; } } DBGMSG0("accepting unload, byeeeee\n"); return 0; }